/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

syntax = "proto3";

package tensorflow;

import "google/protobuf/any.proto";
import "tensorflow/core/framework/cost_graph.proto";
import "tensorflow/core/framework/device_attributes.proto";
import "tensorflow/core/framework/graph.proto";
import "tensorflow/core/framework/step_stats.proto";
import "tensorflow/core/framework/tensor.proto";
import "tensorflow/core/framework/tensor_shape.proto";
import "tensorflow/core/framework/types.proto";
import "tensorflow/core/protobuf/config.proto";
import "tensorflow/core/protobuf/debug.proto";
import "tensorflow/core/protobuf/error_codes.proto";
import "tensorflow/core/protobuf/named_tensor.proto";
import "tensorflow/core/protobuf/tensorflow_server.proto";

option cc_enable_arenas = true;
option java_outer_classname = "WorkerProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.distruntime";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";

////////////////////////////////////////////////////////////////////////////////
//
// GetStatus method request/response messages
//
////////////////////////////////////////////////////////////////////////////////

message GetStatusRequest {}

message GetStatusResponse {
  repeated DeviceAttributes device_attributes = 1;
}

////////////////////////////////////////////////////////////////////////////////
//
// CreateSession method request/response messages
//
// For each session,
//
////////////////////////////////////////////////////////////////////////////////

message CreateWorkerSessionRequest {
  // Sessions are identified by a given handle.
  string session_handle = 1;

  // Defines the configuration of a TensorFlow worker.
  ServerDef server_def = 2;

  // If true, any resources such as Variables used in the session will not be
  // shared with other sessions.
  bool isolate_session_state = 3;

  // The device attributes of all the devices in the cluster.
  repeated DeviceAttributes cluster_device_attributes = 4;

  // The master task name from which the request is sent.
  string master_task = 5;

  // The incarnation ID of the master task local CPU device.
  // If the target worker already has a WorkerSession created previously with
  // the same master task name but a different incarnation, it usually indicates
  // that the previous master failed before deleting the WorkerSession on the
  // worker. To prevent memory leaks, the worker should garbage collect the old
  // WorkerSessions.
  int64 master_incarnation = 6;

  reserved 7;  // Deprecated config that is embedded within server_def now.
}

message CreateWorkerSessionResponse {}

////////////////////////////////////////////////////////////////////////////////
//
// DeleteSession method request/response messages
//
// Deletes all worker-side state associated with the given session handle.
//
////////////////////////////////////////////////////////////////////////////////

message DeleteWorkerSessionRequest {
  // Sessions are identified by a given handle.
  string session_handle = 1;
}

message DeleteWorkerSessionResponse {}

////////////////////////////////////////////////////////////////////////////////
//
// RegisterGraph method request/response messages
//
// For each session, after the master placed every node on a device,
// it partitions the whole graph into many subgraphs. All the nodes in
// a subgraph were in the same worker, but potentially on many devices
// owned by that worker (e.g. cpu0, plus gpu0, gpu1, ..., gpu7). The
// master registers subgraphs for a worker before running any steps. A
// successful registration returns a graph handle to be used in latter
// RunGraph requests.
//
////////////////////////////////////////////////////////////////////////////////

message RegisterGraphRequest {
  // Subgraphs are scoped within one session.
  string session_handle = 1;

  // Set to true if `CreateWorkerSession` was called for `session_handle`.
  bool create_worker_session_called = 6;

  // "graph_def" has the subgraph of nodes for this worker, with each node
  // having its device_name filled in.
  GraphDef graph_def = 2;

  // True iff the graph (before partitioning) contains control flow nodes.
  //
  // As of 01/11/2015, this is no longer set by clients.
  bool has_control_flow = 3 [deprecated = true];

  // Configuration options for the session in which this graph was created.
  GraphOptions graph_options = 4;

  // Field(s) used by TensorFlow Debugger (tfdbg).
  DebugOptions debug_options = 5;

  // If graph_def contains any collective ops this must be a positive
  // integer used to coordinate execution with other graphs.  All
  // graphs in a distributed execution with the same
  // collective_graph_key will coordinate to use the same step_id
  // concurrently so that BufRendezvous entries will make the correct
  // values accessible.
  int64 collective_graph_key = 7;

  // ConfigProto from the session in which this graph was created.
  // Contains additional parameters beyond graph_options, including
  // the name of the requested executor.
  ConfigProto config_proto = 8;
}

message RegisterGraphResponse {
  // If the registration succeeds, returns an opaque graph_handle to
  // the master. The master calls RunGraph with graph_handle to
  // compute different steps.
  string graph_handle = 1;
}

////////////////////////////////////////////////////////////////////////////////
//
// DeregisterGraph method request/response messages
//
// The master deregisters the given graph_handle when the graph is no
// longer needed (e.g., the overall graph is re-scheduled and nodes
// are re-placed).
//
// The worker deregisters a graph_handle automatically according to on
// a TTL-base policy in case of master restarts.
//
////////////////////////////////////////////////////////////////////////////////

message DeregisterGraphRequest {
  // The session_handle used when registering the graph. If session_handle is
  // empty, a single global namespace is used.
  string session_handle = 2;

  // Set to true if `CreateWorkerSession` was called for `session_handle`.
  bool create_worker_session_called = 3;

  // REQUIRED: graph_handle must be returned by a RegisterGraph call
  // to the same WorkerService.
  string graph_handle = 1;
}

message DeregisterGraphResponse {
  // TODO(mrry): Optionally add summary stats for the graph.
}

////////////////////////////////////////////////////////////////////////////////
//
// CleanupAll method request/response messages
//
////////////////////////////////////////////////////////////////////////////////

message CleanupAllRequest {
  // A list of container names.
  //
  // If 'container' is not empty, releases resources in the given
  // containers in all devices.
  //
  // If 'container' is empty, releases resources in the default
  // container in all devices.
  repeated string container = 1;
}

message CleanupAllResponse {}

////////////////////////////////////////////////////////////////////////////////
//
// RunGraph request / response messages
//
// The worker executes all subgraphs registered under graph_handle.
// RunGraph returns after the execution finishes or an error is
// encountered.
// A sequence of RunGraphRequests with is_partial may be sent to RunGraph for
// partial graph execution.
//
////////////////////////////////////////////////////////////////////////////////

// Options specific to the execution of a single step.
message ExecutorOpts {
  bool record_costs = 1;
  bool record_timeline = 3;
  bool record_partition_graphs = 4;
  bool report_tensor_allocations_upon_oom = 5;
}

message RunGraphRequest {
  // session_handle is the master-generated unique id for this session.
  // If session_handle is non-empty, it must be the same as used when
  // registering the graph. If it is empty, a single global namespace is used to
  // search for the graph_handle.
  string session_handle = 8;

  // Set to true if `CreateWorkerSession` was called for `session_handle`.
  bool create_worker_session_called = 10;

  // REQUIRED: graph_handle must be returned by a RegisterGraph call
  // to the same WorkerService.
  string graph_handle = 1;

  // A unique ID to distinguish different runs of the same graph.
  //
  // The master generates a global unique `step_id` to distinguish
  // different runs of the graph computation. Subgraphs communicate
  // (e.g., send/recv ops) with each other using `step_id` to
  // distinguish tensors generated by different runs.
  int64 step_id = 2;

  // Options for this step.
  ExecutorOpts exec_opts = 5;

  // Runs the graph.
  //
  // Sends the tensors in "send" into the graph before the run and
  // fetches the keys into `RunGraphResponse.recv` after the run.
  repeated NamedTensorProto send = 3;
  repeated string recv_key = 4;

  // True if the RunGraphRequest is a partial run request.
  bool is_partial = 6;
  // True if this is the last partial run request in a sequence of requests.
  bool is_last_partial_run = 7;

  // If true then some errors, e.g., execution errors that have long
  // error messages, may return an OK RunGraphResponse with the actual
  // error saved in the status_code/status_error_message fields of the
  // response body. This is a workaround since the RPC subsystem may
  // truncate long metadata messages.
  bool store_errors_in_response_body = 9;

  // Unique identifier for this request. Every RunGraphRequest must have a
  // unique request_id, and retried RunGraphRequests must have the same
  // request_id. If request_id is zero, retry detection is disabled.
  //
  // Retried RunGraphRequests are problematic because they may issue a
  // RecvTensor that will have no corresponding sender and will wait forever.
  // Workers use request_ids to reject retried RunGraph requests instead of
  // waiting forever.
  int64 request_id = 11;

  // Next: 12
}

message RunGraphResponse {
  // A list of tensors corresponding to those requested by
  // `RunGraphRequest.recv_key`.
  repeated NamedTensorProto recv = 1;

  // If the request asked for execution stats, the cost graph, or the partition
  // graphs, these are returned here.
  // TODO(suharshs): Package these in a RunMetadata instead.
  StepStats step_stats = 2;
  CostGraphDef cost_graph = 3;
  repeated GraphDef partition_graph = 4;

  // If store_errors_in_response_body is true in the request, then
  // optionally the server may return an OK status for the RPC and
  // fill the true status into the fields below, to allow for messages
  // that are too long to fit in metadata.
  error.Code status_code = 5;
  string status_error_message = 6;
}

////////////////////////////////////////////////////////////////////////////////
//
// CleanupGraph method request/response messages
//
// After the master receives RunGraph responses from all workers, the
// master instructs every worker to cleanup any remaining state of a
// step (e.g. tensors buffered by a `Send` op but not picked up by
// other workers). The master does not necessarily need to wait for
// completion of CleanupGraph calls.
//
// Workers should cleanup step states automatically according to a
// TTL-based policy in case of master restarts.
//
////////////////////////////////////////////////////////////////////////////////

message CleanupGraphRequest {
  int64 step_id = 1;
}

message CleanupGraphResponse {}

////////////////////////////////////////////////////////////////////////////////
//
// RecvTensor method request/response messages
//
////////////////////////////////////////////////////////////////////////////////

message RecvTensorRequest {
  // The step in which the tensor will be produced.
  //
  // REQUIRED: This must eventually correspond to the `step_id` passed
  // into a RunGraph call on the same WorkerService.
  int64 step_id = 1;

  // A key identifying the channel to receive tensors from. A RecvTensor request
  // retrieves one tensor from the channel, but multiple tensors can be sent and
  // received over the same channel with multiple RecvTensor requests. See
  // rendezvous.h for details.
  string rendezvous_key = 2;

  // If true, use an out-of-band DMA mechanism to transfer the
  // received tensor.
  bool dma_ok = 3;

  // Optional information on client-side device locality.
  DeviceLocality client_locality = 4;

  // Optional information on server-side device locality.
  DeviceLocality server_locality = 5;

  // Optional information needed by the RPC subsystem.
  google.protobuf.Any transport_options = 6;

  // Unique identifier for this request. Every RecvTensorRequest must have a
  // unique request_id, and retried RecvTensorRequests must have the same
  // request_id. If request_id is zero, retry detection and response cache
  // are disabled.
  //
  // Retried RecvTensorRequests are problematic because a RecvTensor with no
  // corresponding sender will wait forever, and the tensor may have been
  // delivered to a previous retry. Workers use request_ids to reject retried
  // RecvTensor requests instead of waiting forever.
  int64 request_id = 7;
}

message RecvTensorResponse {
  // The tensor as a proto.
  TensorProto tensor = 1;

  // If true, this tensor was the output of a dead node, and the
  // content is invalid.
  bool is_dead = 2;

  // The time at which tensor was available and started to be returned.
  int64 send_start_micros = 3;

  // Optional additional information about how to receive the tensor,
  // e.g. in the event that `RecvTensorRequest.dma_ok` was true.
  google.protobuf.Any transport_options = 4;

  // Whether the receiver should send a MarkRecvFinishedRequest to the sender
  // to ack the message.
  bool require_ack = 5;
}

// Message for managing the response cache maintained on the sender side.
// Currently only used by the gRPC worker service.
message MarkRecvFinishedRequest {
  int64 request_id = 1;
}

message MarkRecvFinishedResponse {}

////////////////////////////////////////////////////////////////////////////////
//
// Logging method request/response messages
//
// NOTE(mrry): This feature is not supported in the open-source
// version, and these messages are expected to change.
//
////////////////////////////////////////////////////////////////////////////////

// Out-of-band request to begin or end logging, or
// to retrieve logs for particular steps.
message LoggingRequest {
  // If true, RPC logging will be enabled.
  bool enable_rpc_logging = 1;

  // If true, RPC logging will be disabled.
  bool disable_rpc_logging = 4;

  // If true, discard any saved logging data (for all steps).
  bool clear = 2;

  // When set, requests all saved log data pertaining to the step.
  // Any log data retrieved is eliminated from the store and cannot be
  // retrieved again.
  repeated int64 fetch_step_id = 3;
}

message LabeledStepStats {
  int64 step_id = 1;
  StepStats step_stats = 2;
}

message LoggingResponse {
  repeated LabeledStepStats step = 1;
}

////////////////////////////////////////////////////////////////////////////////
//
// Tracing method request/response messages
//
// NOTE(mrry): This feature is not supported in the open-source
// version, and these messages are expected to change.
//
////////////////////////////////////////////////////////////////////////////////

message TraceOpts {
  // Length of the trace to be taken, in seconds.
  double duration = 1;
  // If true, capture step profile locally in each worker. Currently
  // unimplemented.
  bool use_step_profiler = 2;
  // If true, capture kernel events from each worker.
  bool use_kernel_profiler = 3;
  // If true, capture extended profiling events from TensorFlow process.
  bool use_extended_profiler = 4;
  // If true, capture GPU profiling events locally on each
  // machine. Currently unimplemented.
  bool use_gpu_profiler = 5;
  // If true, collect sampled profile events. Currently unimplemented.
  bool use_sample_profiler = 6;
}

// Out-of-band request to configure distributed tracing.
message TracingRequest {
  TraceOpts options = 1;
}

message TracingResponse {}

////////////////////////////////////////////////////////////////////////////////
//
// Raw data transfers in support of Collective Ops.
// These methods are experimental and subject to change.
//
// The intention is to allow collectives to take advantage of the most
// efficient methods available on a platform, e.g. RDMA, and not be
// constrained to use the RPC system in use by other methods.
//
////////////////////////////////////////////////////////////////////////////////

message RecvBufRequest {
  // Use of the fields below may vary by implementation.  For example
  // the buf_ptr and num_bytes may be set only for local operations and
  // not sent on the wire, or only sent on the wire in one direction.

  // Used at server side to find the correct BufRendezvous.
  int64 step_id = 1;

  // Arbitrary string identifying a BufRendezvous entry.
  string buf_rendezvous_key = 2;

  // Size of value expected, must agree with BufRendezvous entry.
  int64 num_bytes = 3;

  // When RDMA is in use, address of destination field on client.
  fixed64 buf_ptr = 4;

  // Optional information on client-side device locality.
  DeviceLocality client_locality = 5;

  // Optional information on server-side device locality.
  DeviceLocality server_locality = 6;

  // Optional, implementation-specific data.
  google.protobuf.Any transport_options = 7;
  // For annotating timeline and device incarnation check.
  string src_device = 8;
  // Optional, for annotating the timeline.
  string dst_device = 9;

  // Depending on the RPC system in use, it may be necessary to set this
  // id to detect resends of RPCs where the server is not aware that
  // the prior RPC failed.
  int64 request_id = 10;

  // Incarnation number of the source device, used to detect worker failures.
  uint64 src_incarnation = 11;
}

message RecvBufResponse {
  // Use of the fields below may vary by implementation.  Comments give
  // intended use.

  fixed64 buf_ptr = 1;  // Address of source field on server.
  int64 num_bytes = 2;  // Byte length of buf_ptr field, if set.
  bool is_dead = 3;     // True if value is 'dead' like a tensor.
  // Optional, implementation-specific data.
  google.protobuf.Any transport_options = 4;
  // Optional, for timeline.
  int64 send_start_micros = 5;

  // Whether the receiver should send a MarkRecvFinishedRequest to the sender
  // to ack the message.
  bool require_ack = 6;
}

////////////////////////////////////////////////////////////////////////////////
//
// Collective Op dynamic group resolution messages.
//
////////////////////////////////////////////////////////////////////////////////

// Supplies one or more device names as members of the group identified by
// group_key.  Service will respond when all group_size devices become known.
// All devices in group must have same type.
message CompleteGroupRequest {
  int32 group_key = 1;
  int32 group_size = 2;
  string device_type = 3;
  int32 collective_type = 5;
  DeviceAttributes device_attributes = 6;

  reserved 4;
}

// Gives the complete membership of the group identified by group_key.
message CompleteGroupResponse {
  int32 group_key = 1;
  int32 group_size = 2;
  string device_type = 3;
  int32 num_tasks = 4;  // number of distinct tasks hosting the devices
  bytes communicator_key = 7;
  repeated DeviceAttributes device_attributes = 8;

  reserved 5, 6;
}

// Supplies data about one collective op belonging to the instance identified
// by instance_key and step_id.  Service will respond when all group_size ops
// have become known.  Most of the data being sent is for correctness checking,
// to ensure that all ops in the instance share common attributes.
message CompleteInstanceRequest {
  string name = 1;
  int32 type = 2;
  DataType data_type = 3;
  TensorShapeProto shape = 4;
  int32 group_key = 5;
  int32 group_size = 6;
  int32 instance_key = 7;
  string device_type = 8;
  repeated int32 subdiv_offset = 9;
  string device = 10;
  bool is_source = 11;
  int64 step_id = 12;
}

// Confirms that every op in the instance has consistently declared itself.
// Also gives the source_rank in case of broadcast.
message CompleteInstanceResponse {
  int32 instance_key = 1;
  int32 source_rank = 2;
  reserved 3;
}

// Request for next agreed-upon step_id for the specified graph_keys.
// This is used to enable multiple graphs containing nodes from
// a common collective instance to coordinate using the same step_ids.
message GetStepSequenceRequest {
  repeated int64 graph_key = 1;
}

message StepSequence {
  int64 graph_key = 1;
  int64 next_step_id = 2;
}

// Next valid step_ids for one or more graph_keys.
message GetStepSequenceResponse {
  repeated StepSequence step_sequence = 1;
}
